# produces Tables 7, 8, 10 in section 5 and Tables A5, A6 in the appendix

# Libraries
library(tidyverse)
library(ggplot2)
library(ggrepel)
library(ggpmisc)
library(broom)
library(data.table) 
library(dplyr)
library(msm)
library(plm)
library(ivreg)


wkdir <- "/Users/huiyuli/Dropbox/Entry Technology/JPE macro submission/Replication Script"


##################### Prepare data for Table 7  #################### 
data_gsp <- fread(paste(wkdir, "raw data/realStateGDP.csv",  sep="/")) %>% 
  rename(real_gsp = rgdp_haver)
data_gsp <- mutate(data_gsp, state_abbrev = toupper(state_abbrev))
data_fips <- fread(paste(wkdir, "raw data/stateFips.csv",  sep="/"))
data_gsp <- merge(data_gsp, data_fips, by.x = "state_abbrev", by.y = "stusps", all.x = TRUE, all.y = TRUE)
data_bds <- fread(paste(wkdir, "raw data/bds2020_st.csv",  sep="/"),  select = c("year", "firms", "emp",'st'))
data_gsp_bds <- merge(data_gsp, data_bds, by.x = c("st", "year"), by.y = c("st","year"), all.x = FALSE, all.y = TRUE)
#data_bds_entrant <- fread(paste(wkdir, "raw data/bds2020_st_fa.csv",  sep="/"),  
#                          select = c("year", "st", "fage", "firms", "emp"))[grep('a) 0', fage)][, c("year", "st", "firms", "emp")]
#data_bds_entrant <- as.data.frame(sapply(data_bds_entrant, as.numeric)) 
#data_bds_entrant <- mutate(data_bds_entrant, ln_emp_per_firm_entrant = log(emp / firms))
#data_gsp_bds <- merge(data_gsp_bds, data_bds_entrant[, c("year", "st", "ln_emp_per_firm_entrant")], by.x = c("st","year"), by.y = c("st","year"), all.x = TRUE, all.y = FALSE)
data_gsp_bds <-mutate(data_gsp_bds, ln_GSP_worker = log(real_gsp / emp),
                      ln_emp_per_firm = log(emp / firms),
                      ln_emp = log(emp),
                      ln_firms = log(firms))

data_gsp_bds <- data_gsp_bds[ data_gsp_bds$state_abbrev != "DC",]
data_gsp_bds$ln_firms_lag <- dplyr::lag(data_gsp_bds$ln_firms, n=1, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
firstyear = min(data_gsp_bds$year)
lastyear = max(data_gsp_bds$year)
data_gsp_bds[data_gsp_bds$year==firstyear, c("ln_firms_lag")]<- NA

# calculate 1 year change in the variables
h = 1
data_gsp_bds$ln_firms_lag_lagged1 <- dplyr::lag(data_gsp_bds$ln_firms_lag, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_lagged1 <- dplyr::lag(data_gsp_bds$ln_emp, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_per_firm_lagged1 <- dplyr::lag(data_gsp_bds$ln_emp_per_firm, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
#data_gsp_bds$ln_emp_per_firm_entrant_lagged1 <- dplyr::lag(data_gsp_bds$ln_emp_per_firm_entrant, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_GSP_worker_lagged1 <- dplyr::lag(data_gsp_bds$ln_GSP_worker, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
#data_gsp_bds[data_gsp_bds$year==firstyear+h-1, c("ln_firms_lag_lagged1", "ln_emp_lagged1","ln_emp_per_firm_lagged1","ln_emp_per_firm_entrant_lagged1","ln_GSP_worker_lagged1")]<- NA
data_gsp_bds[data_gsp_bds$year==firstyear+h-1, c("ln_firms_lag_lagged1", "ln_emp_lagged1","ln_emp_per_firm_lagged1","ln_GSP_worker_lagged1")]<- NA

# 1 year change in the regression variables
data_gsp_bds$d_ln_firms_lag_1 = data_gsp_bds$ln_firms_lag - data_gsp_bds$ln_firms_lag_lagged1
data_gsp_bds$d_ln_emp_1 = data_gsp_bds$ln_emp - data_gsp_bds$ln_emp_lagged1
data_gsp_bds$d_ln_emp_per_firm_1 = data_gsp_bds$ln_emp_per_firm -  data_gsp_bds$ln_emp_per_firm_lagged1 
#data_gsp_bds$d_ln_emp_per_firm_entrant_1 = data_gsp_bds$ln_emp_per_firm_entrant -  data_gsp_bds$ln_emp_per_firm_entrant_lagged1 
data_gsp_bds$d_ln_GSP_worker_1 = data_gsp_bds$ln_GSP_worker - data_gsp_bds$ln_GSP_worker_lagged1


# 10 year lag
h = 10
data_gsp_bds$ln_firms_lag_lagged10 <- dplyr::lag(data_gsp_bds$ln_firms_lag, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_lagged10 <- dplyr::lag(data_gsp_bds$ln_emp, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_per_firm_lagged10 <- dplyr::lag(data_gsp_bds$ln_emp_per_firm, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
#data_gsp_bds$ln_emp_per_firm_entrant_lagged10 <- dplyr::lag(data_gsp_bds$ln_emp_per_firm_entrant, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_GSP_worker_lagged10 <- dplyr::lag(data_gsp_bds$ln_GSP_worker, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
#data_gsp_bds[data_gsp_bds$year<=firstyear+h-1, c("ln_firms_lag_lagged41","ln_emp_lagged41","ln_emp_per_firm_lagged41","ln_emp_per_firm_entrant_lagged41","ln_GSP_worker_lagged41")]<- NA
data_gsp_bds[data_gsp_bds$year<=firstyear+h-1, c("ln_firms_lag_lagged10","ln_emp_lagged10","ln_emp_per_firm_lagged10","ln_GSP_worker_lagged10")]<- NA
data_gsp_bds$d_ln_firms_lag_10 = data_gsp_bds$ln_firms_lag - data_gsp_bds$ln_firms_lag_lagged10
data_gsp_bds$d_ln_emp_10 = data_gsp_bds$ln_emp - data_gsp_bds$ln_emp_lagged10
data_gsp_bds$d_ln_emp_per_firm_10 = data_gsp_bds$ln_emp_per_firm -  data_gsp_bds$ln_emp_per_firm_lagged10 
#data_gsp_bds$d_ln_emp_per_firm_entrant_10 = data_gsp_bds$ln_emp_per_firm_entrant -  data_gsp_bds$ln_emp_per_firm_entrant_lagged10 
data_gsp_bds$d_ln_GSP_worker_10 = data_gsp_bds$ln_GSP_worker - data_gsp_bds$ln_GSP_worker_lagged10

#keep none-overlapping periods
periods = c(lastyear, lastyear-(h+2), lastyear-2*(h+2))
data_gsp_bds10 <- data_gsp_bds[data_gsp_bds$year %in%  periods, c("st", "year", "d_ln_firms_lag_10","d_ln_emp_10","d_ln_emp_per_firm_10","d_ln_GSP_worker_10")] 


View(data_gsp_bds)

#####################  Table 7 #################### 
sigma = 4 
# Col 1
eq_col <- plm(d_ln_emp_per_firm_10 ~ d_ln_GSP_worker_10, data = data_gsp_bds10, index=c("st", "year"), model="within" )
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_10", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(as.numeric(summary(eq_col)$coefficients["d_ln_GSP_worker_10", "Std. Error"]), digits = 3)
phi = 0
se_phi = 0
r2 = round(as.numeric(summary(eq_col)$r.squared["rsq"]), digits = 3)
amp = -coef_prod/(sigma-1+coef_prod)*100
amp_se = as.numeric(deltamethod(~ -x1/(sigma-1+x1)*100, coef(eq_col), vcov(eq_col)))
N = as.numeric(nobs(eq_col))
table.col1 <- c(lambda,se_lambda,phi,se_phi,r2, N, amp, amp_se)

# Col 2
eq_col <- plm(d_ln_emp_per_firm_10 ~ d_ln_GSP_worker_10 + d_ln_firms_lag_10, data = data_gsp_bds10, index=c("st", "year"), model="within")
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_10", "Estimate"]
coef_firm = summary(eq_col)$coefficients["d_ln_firms_lag_10", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(as.numeric(summary(eq_col)$coefficients["d_ln_GSP_worker_10", "Std. Error"]), digits = 3)
phi = round(-coef_firm, digits= 3)
se_phi = round(as.numeric(summary(eq_col)$coefficients["d_ln_firms_lag_10", "Std. Error"]), digits = 3)
r2 = round(as.numeric(summary(eq_col)$r.squared["rsq"]), digits = 3)
amp = -coef_prod/(1+coef_firm)/(sigma-1+coef_prod/(1+coef_firm))*100
amp_se =as.numeric( deltamethod(~ -x1/(1+x2)/(sigma-1+x1/(1+x2))*100, coef(eq_col), vcov(eq_col)))
N = as.numeric(nobs(eq_col))
table.col2 <- c(lambda,se_lambda,phi,se_phi,r2,N,amp,amp_se)

# Col 3
eq_col <- plm( d_ln_emp_per_firm_1 ~ d_ln_GSP_worker_1, data = data_gsp_bds, index=c("st", "year"), model="within")
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(as.numeric(summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Std. Error"]), digits = 3)
phi = 0
se_phi = 0
r2 = round(as.numeric(summary(eq_col)$r.squared["rsq"]), digits = 3)
amp = -coef_prod/(sigma-1+coef_prod)*100
amp_se = as.numeric(deltamethod(~ -x1/(sigma-1+x1)*100, coef(eq_col), vcov(eq_col)))
N = as.numeric(nobs(eq_col))
table.col3 <- c(lambda,se_lambda,phi,se_phi,r2,N,amp,amp_se)

# Col 4
eq_col <- plm(d_ln_emp_per_firm_1 ~ d_ln_GSP_worker_1 + d_ln_firms_lag_1, data = data_gsp_bds, index=c("st", "year"), model="within")
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Estimate"]
coef_firm = summary(eq_col)$coefficients["d_ln_firms_lag_1", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(as.numeric(summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Std. Error"]), digits = 3)
phi = round(-coef_firm, digits= 3)
se_phi = round(as.numeric(summary(eq_col)$coefficients["d_ln_firms_lag_1", "Std. Error"]), digits = 3)
r2 = round(as.numeric(summary(eq_col)$r.squared["rsq"]), digits = 3)
amp = -coef_prod/(1+coef_firm)/(sigma-1+coef_prod/(1+coef_firm))*100
amp_se = as.numeric(deltamethod(~ -x1/(1+x2)/(sigma-1+x1/(1+x2))*100, coef(eq_col), vcov(eq_col)))
N = as.numeric(nobs(eq_col))
table.col4 <- c(lambda,se_lambda,phi,se_phi,r2,N,amp,amp_se)

table <- cbind(table.col1, table.col2, table.col3, table.col4)
table <- round(table, digits=3)
rnames <- c("lambdar", "se_lam", "phi", "se_phi", "R2", "N", "amp", "se_amp")
cnames <- c("10 years","10 years lagged","1 years","1 years lagged")
table7 <- matrix(table,nrow=8,byrow=FALSE, dimnames=list(rnames,cnames))
View(table7)

write.csv(table7,file=paste(wkdir, "output/tables/table7.csv",  sep="/"), row.names=TRUE)

##################### Prepare data for Table 8  #################### 
data_gsp <- fread(paste(wkdir, "raw data/realStateGDP.csv",  sep="/")) %>% 
  rename(real_gsp = rgdp_haver)
data_gsp <- mutate(data_gsp, state_abbrev = toupper(state_abbrev))
data_fips <- fread(paste(wkdir, "raw data/stateFips.csv",  sep="/"))
data_gsp <- merge(data_gsp, data_fips, by.x = "state_abbrev", by.y = "stusps", all.x = TRUE, all.y = TRUE)
#data_bds <- fread(paste(wkdir, "raw data/bds2020_st.csv",  sep="/"),  select = c("year", "firms", "emp",'st'))
#data_gsp_bds <- merge(data_gsp, data_bds, by.x = c("st", "year"), by.y = c("st","year"), all.x = FALSE, all.y = TRUE)
#data_bds_entrant <- fread(paste(wkdir, "raw data/bds2020_st_fa.csv",  sep="/"),  
#                          select = c("year", "st", "fage", "firms", "emp"))[grep('a) 0', fage)][, c("year", "st", "firms", "emp")]
#data_bds_entrant <- as.data.frame(sapply(data_bds_entrant, as.numeric)) 
#data_bds_entrant <- mutate(data_bds_entrant, ln_emp_per_firm_entrant = log(emp / firms))
#data_gsp_bds <- merge(data_gsp_bds, data_bds_entrant[, c("year", "st", "ln_emp_per_firm_entrant")], by.x = c("st","year"), by.y = c("st","year"), all.x = TRUE, all.y = FALSE)

# df_fInd = pd.read_csv('Input//bds2020_st_sec.csv', delimiter=',')
data_bds_sec <- fread(paste(wkdir, "raw data/bds2020_st_sec.csv",  sep="/"),  select = c("year", "firms", "emp",'st', 'sector'))
data_bds_sec[data_bds_sec$firms == '(D)', c('emp', 'firms')] = 0
state_firms <- aggregate(as.numeric(data_bds_sec$firms), by=list( data_bds_sec$st, data_bds_sec$year), FUN=sum)%>% 
  rename(st = Group.1, year = Group.2, total_firms = x)
state_emp <- aggregate(as.numeric(data_bds_sec$emp), by=list( data_bds_sec$st, data_bds_sec$year), FUN=sum)%>% 
  rename(st = Group.1, year = Group.2, total_emp = x)
data_bds_sec <- merge(data_bds_sec, state_firms, by.x = c("st", "year"), by.y = c("st","year"), all.x = TRUE, all.y = TRUE)
data_bds_sec$firm_share = as.numeric(data_bds_sec$firms)/data_bds_sec$total_firms
average_firm_share  <- aggregate(data_bds_sec$firm_share, by=list(data_bds_sec$st, data_bds_sec$sector), mean)%>% 
  rename(st = Group.1, sector = Group.2, average_firm_share = x) 
data_bds_sec <- merge(data_bds_sec, average_firm_share, by.x = c("st", "sector"), by.y = c("st","sector"), all.x = TRUE, all.y = TRUE)
data_bds_sec$emp_firm_sec <- as.numeric(data_bds_sec$emp)/as.numeric(data_bds_sec$firms)
data_bds_fixedInd <-  aggregate(data_bds_sec$emp_firm_sec*data_bds_sec$average_firm_share, by=list(data_bds_sec$st, data_bds_sec$year), FUN=sum)%>% 
  rename(st = Group.1, year = Group.2, emp_per_firm_fixedInd = x)
data_bds_fixedInd <-  merge(data_bds_fixedInd, state_firms, by.x = c("st", "year"), by.y = c("st","year"), all.x = TRUE, all.y = TRUE)  %>%   rename(firms = total_firms )
data_bds_fixedInd <-  merge(data_bds_fixedInd, state_emp, by.x = c("st", "year"), by.y = c("st","year"), all.x = TRUE, all.y = TRUE) %>%   rename(emp = total_emp )

data_gsp_bds <- merge(data_gsp, data_bds_fixedInd, by.x = c("st", "year"), by.y = c("st","year"), all.x = FALSE, all.y = TRUE)

data_gsp_bds <-mutate(data_gsp_bds, ln_GSP_worker = log(real_gsp / emp),
                      ln_emp_per_firm = log(emp / firms),
                      ln_emp_per_firm_fixedInd = log(emp_per_firm_fixedInd),
                      ln_emp = log(emp),
                      ln_firms = log(firms))


data_gsp_bds <- data_gsp_bds[ data_gsp_bds$state_abbrev != "DC",]
data_gsp_bds$ln_firms_lag <- dplyr::lag(data_gsp_bds$ln_firms, n=1, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
firstyear = min(data_gsp_bds$year)
data_gsp_bds[data_gsp_bds$year==firstyear, c("ln_firms_lag")]<- NA

# calculate 1 year change in the variables
h = 1
data_gsp_bds$ln_firms_lag_lagged1 <- dplyr::lag(data_gsp_bds$ln_firms_lag, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_lagged1 <- dplyr::lag(data_gsp_bds$ln_emp, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_per_firm_lagged1 <- dplyr::lag(data_gsp_bds$ln_emp_per_firm, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_per_firm_fixedInd_lagged1 <- dplyr::lag(data_gsp_bds$ln_emp_per_firm_fixedInd, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_GSP_worker_lagged1 <- dplyr::lag(data_gsp_bds$ln_GSP_worker, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds[data_gsp_bds$year==firstyear+h-1, c("ln_firms_lag_lagged1", "ln_emp_lagged1","ln_emp_per_firm_lagged1","ln_emp_per_firm_entrant_lagged1","ln_GSP_worker_lagged1")]<- NA

# 1 year change in the regression variables
data_gsp_bds$d_ln_firms_lag_1 = data_gsp_bds$ln_firms_lag - data_gsp_bds$ln_firms_lag_lagged1
data_gsp_bds$d_ln_emp_1 = data_gsp_bds$ln_emp - data_gsp_bds$ln_emp_lagged1
data_gsp_bds$d_ln_emp_per_firm_1 = data_gsp_bds$ln_emp_per_firm -  data_gsp_bds$ln_emp_per_firm_lagged1 
data_gsp_bds$d_ln_emp_per_firm_fixedInd_1 = data_gsp_bds$ln_emp_per_firm_fixedInd -  data_gsp_bds$ln_emp_per_firm_fixedInd_lagged1 
data_gsp_bds$d_ln_GSP_worker_1 = data_gsp_bds$ln_GSP_worker - data_gsp_bds$ln_GSP_worker_lagged1


# 41 year lag
h = 41
data_gsp_bds$ln_firms_lag_lagged41 <- dplyr::lag(data_gsp_bds$ln_firms_lag, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_lagged41 <- dplyr::lag(data_gsp_bds$ln_emp, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_per_firm_lagged41 <- dplyr::lag(data_gsp_bds$ln_emp_per_firm, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_per_firm_fixedInd_lagged41 <- dplyr::lag(data_gsp_bds$ln_emp_per_firm_fixedInd, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_GSP_worker_lagged41 <- dplyr::lag(data_gsp_bds$ln_GSP_worker, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds[data_gsp_bds$year<=firstyear+h-1, c("ln_firms_lag_lagged41","ln_emp_lagged41","ln_emp_per_firm_lagged41","ln_emp_per_firm_fixedInd_lagged41","ln_GSP_worker_lagged41")]<- NA

# 41 year change in the regression variables
data_gsp_bds$d_ln_firms_lag_41 = data_gsp_bds$ln_firms_lag - data_gsp_bds$ln_firms_lag_lagged41
data_gsp_bds$d_ln_emp_41 = data_gsp_bds$ln_emp - data_gsp_bds$ln_emp_lagged41
data_gsp_bds$d_ln_emp_per_firm_41 = data_gsp_bds$ln_emp_per_firm -  data_gsp_bds$ln_emp_per_firm_lagged41 
data_gsp_bds$d_ln_emp_per_firm_fixedInd_41 = data_gsp_bds$ln_emp_per_firm_fixedInd -  data_gsp_bds$ln_emp_per_firm_fixedInd_lagged41 
data_gsp_bds$d_ln_GSP_worker_41 = data_gsp_bds$ln_GSP_worker - data_gsp_bds$ln_GSP_worker_lagged41
View(data_gsp_bds)

#####################  Table 8 ####################
sigma = 4
# Col 1
eq_col <- lm( d_ln_emp_per_firm_fixedInd_41 ~ d_ln_GSP_worker_41, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_41", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_41", "Std. Error"], digits = 3)
phi = 0
se_phi = 0
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(sigma-1+coef_prod)*100
amp_se = deltamethod(~ -x2/(sigma-1+x2)*100, coef(eq_col), vcov(eq_col))
table.col1 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

# Col 2
eq_col <- lm(d_ln_emp_per_firm_fixedInd_41 ~ d_ln_GSP_worker_41 + d_ln_firms_lag_41, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_41", "Estimate"]
coef_firm = summary(eq_col)$coefficients["d_ln_firms_lag_41", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_41", "Std. Error"], digits = 3)
phi = round(-coef_firm, digits= 3)
se_phi = round(summary(eq_col)$coefficients["d_ln_firms_lag_41", "Std. Error"], digits = 3)
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(1+coef_firm)/(sigma-1+coef_prod/(1+coef_firm))*100
amp_se = deltamethod(~ -x2/(1+x3)/(sigma-1+x2/(1+x3))*100, coef(eq_col), vcov(eq_col))
table.col2 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

# Col 3
eq_col <- lm( d_ln_emp_per_firm_fixedInd_1 ~ d_ln_GSP_worker_1, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Std. Error"], digits = 3)
phi = 0
se_phi = 0
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(sigma-1+coef_prod)*100
amp_se = deltamethod(~ -x2/(sigma-1+x2)*100, coef(eq_col), vcov(eq_col))
table.col3 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

# Col 4
eq_col <- lm(d_ln_emp_per_firm_fixedInd_1 ~ d_ln_GSP_worker_1 + d_ln_firms_lag_1, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Estimate"]
coef_firm = summary(eq_col)$coefficients["d_ln_firms_lag_1", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Std. Error"], digits = 3)
phi = round(-coef_firm, digits= 3)
se_phi = round(summary(eq_col)$coefficients["d_ln_firms_lag_1", "Std. Error"], digits = 3)
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(1+coef_firm)/(sigma-1+coef_prod/(1+coef_firm))*100
amp_se = deltamethod(~ -x2/(1+x3)/(sigma-1+x2/(1+x3))*100, coef(eq_col), vcov(eq_col))
table.col4 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

table <- cbind(table.col1, table.col2, table.col3, table.col4)
table <- round(table, digits=3)
rnames <- c("lambdar", "se", "phi", "se", "R2", "N", "amp", "se")
cnames <- c("41 years","41 years lagged N","1 years","1 years lagged N")
table8 <- matrix(table,nrow=8,byrow=FALSE,dimnames=list(rnames,cnames))
View(table8)

# save regression results
write.csv(table8,file=paste(wkdir, "output/tables/table8.csv",  sep="/"), row.names=TRUE)


###################### Prepare data for Table A5 #################### 
data_gsp <- fread(paste(wkdir, "raw data/realStateGDP.csv",  sep="/")) %>% 
  rename(real_gsp = rgdp_haver)
data_gsp <- mutate(data_gsp, state_abbrev = toupper(state_abbrev))
data_fips <- fread(paste(wkdir, "raw data/stateFips.csv",  sep="/"))
data_gsp <- merge(data_gsp, data_fips, by.x = "state_abbrev", by.y = "stusps", all.x = TRUE, all.y = TRUE)
data_bds <- fread(paste(wkdir, "raw data/bds2020_st.csv",  sep="/"),  select = c("year", "firms", "emp",'st'))
data_gsp_bds <- merge(data_gsp, data_bds, by.x = c("st", "year"), by.y = c("st","year"), all.x = FALSE, all.y = TRUE)
data_bds_small <- fread(paste(wkdir, "raw data/bds2020_st_fz.csv",  sep="/"),  
                          select = c("year", "st", "fsize", "firms", "emp"))[grep('a) 1 to 4', fsize)][, c("year", "st", "firms", "emp")]
data_bds_small <- as.data.frame(sapply(data_bds_small, as.numeric)) 
data_bds_small <- mutate(data_bds_small, ln_emp_per_firm_small = log(emp / firms))
data_gsp_bds <- merge(data_gsp_bds, data_bds_small[, c("year", "st", "ln_emp_per_firm_small")], by.x = c("st","year"), by.y = c("st","year"), all.x = TRUE, all.y = FALSE)
data_gsp_bds <-mutate(data_gsp_bds, ln_GSP_worker = log(real_gsp / emp),
                      ln_emp_per_firm = log(emp / firms),
                      ln_emp = log(emp),
                      ln_firms = log(firms))

data_gsp_bds <- data_gsp_bds[ data_gsp_bds$state_abbrev != "DC",]
data_gsp_bds$ln_firms_lag <- dplyr::lag(data_gsp_bds$ln_firms, n=1, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
firstyear = min(data_gsp_bds$year)
data_gsp_bds[data_gsp_bds$year==firstyear, c("ln_firms_lag")]<- NA

# calculate 1 year change in the variables
h = 1
data_gsp_bds$ln_firms_lag_lagged1 <- dplyr::lag(data_gsp_bds$ln_firms_lag, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_lagged1 <- dplyr::lag(data_gsp_bds$ln_emp, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_per_firm_lagged1 <- dplyr::lag(data_gsp_bds$ln_emp_per_firm, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_per_firm_small_lagged1 <- dplyr::lag(data_gsp_bds$ln_emp_per_firm_small, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_GSP_worker_lagged1 <- dplyr::lag(data_gsp_bds$ln_GSP_worker, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds[data_gsp_bds$year==firstyear+h-1, c("ln_firms_lag_lagged1", "ln_emp_lagged1","ln_emp_per_firm_lagged1","ln_emp_per_firm_small_lagged1","ln_GSP_worker_lagged1")]<- NA

# 1 year change in the regression variables
data_gsp_bds$d_ln_firms_lag_1 = data_gsp_bds$ln_firms_lag - data_gsp_bds$ln_firms_lag_lagged1
data_gsp_bds$d_ln_emp_1 = data_gsp_bds$ln_emp - data_gsp_bds$ln_emp_lagged1
data_gsp_bds$d_ln_emp_per_firm_1 = data_gsp_bds$ln_emp_per_firm -  data_gsp_bds$ln_emp_per_firm_lagged1 
data_gsp_bds$d_ln_emp_per_firm_small_1 = data_gsp_bds$ln_emp_per_firm_small -  data_gsp_bds$ln_emp_per_firm_small_lagged1 
data_gsp_bds$d_ln_GSP_worker_1 = data_gsp_bds$ln_GSP_worker - data_gsp_bds$ln_GSP_worker_lagged1


# 41 year lag
h = 41
data_gsp_bds$ln_firms_lag_lagged41 <- dplyr::lag(data_gsp_bds$ln_firms_lag, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_lagged41 <- dplyr::lag(data_gsp_bds$ln_emp, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_per_firm_lagged41 <- dplyr::lag(data_gsp_bds$ln_emp_per_firm, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_per_firm_small_lagged41 <- dplyr::lag(data_gsp_bds$ln_emp_per_firm_small, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_GSP_worker_lagged41 <- dplyr::lag(data_gsp_bds$ln_GSP_worker, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds[data_gsp_bds$year<=firstyear+h-1, c("ln_firms_lag_lagged41","ln_emp_lagged41","ln_emp_per_firm_lagged41","ln_emp_per_firm_small_lagged41","ln_GSP_worker_lagged41")]<- NA

# 41 year change in the regression variables
data_gsp_bds$d_ln_firms_lag_41 = data_gsp_bds$ln_firms_lag - data_gsp_bds$ln_firms_lag_lagged41
data_gsp_bds$d_ln_emp_41 = data_gsp_bds$ln_emp - data_gsp_bds$ln_emp_lagged41
data_gsp_bds$d_ln_emp_per_firm_41 = data_gsp_bds$ln_emp_per_firm -  data_gsp_bds$ln_emp_per_firm_lagged41 
data_gsp_bds$d_ln_emp_per_firm_small_41 = data_gsp_bds$ln_emp_per_firm_small -  data_gsp_bds$ln_emp_per_firm_small_lagged41 
data_gsp_bds$d_ln_GSP_worker_41 = data_gsp_bds$ln_GSP_worker - data_gsp_bds$ln_GSP_worker_lagged41
View(data_gsp_bds)

#####################  Table A5 #################### 
sigma = 4 
# Col 1
eq_col <- lm( d_ln_emp_per_firm_small_41 ~ d_ln_GSP_worker_41, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_41", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_41", "Std. Error"], digits = 3)
phi = 0
se_phi = 0
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(sigma-1+coef_prod)*100
amp_se = deltamethod(~ -x2/(sigma-1+x2)*100, coef(eq_col), vcov(eq_col))
table.col1 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

# Col 2
eq_col <- lm(d_ln_emp_per_firm_small_41 ~ d_ln_GSP_worker_41 + d_ln_firms_lag_41, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_41", "Estimate"]
coef_firm = summary(eq_col)$coefficients["d_ln_firms_lag_41", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_41", "Std. Error"], digits = 3)
phi = round(-coef_firm, digits= 3)
se_phi = round(summary(eq_col)$coefficients["d_ln_firms_lag_41", "Std. Error"], digits = 3)
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(1+coef_firm)/(sigma-1+coef_prod/(1+coef_firm))*100
amp_se = deltamethod(~ -x2/(1+x3)/(sigma-1+x2/(1+x3))*100, coef(eq_col), vcov(eq_col))
table.col2 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

# Col 3
eq_col <- lm( d_ln_emp_per_firm_small_1 ~ d_ln_GSP_worker_1, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Std. Error"], digits = 3)
phi = 0
se_phi = 0
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(sigma-1+coef_prod)*100
amp_se = deltamethod(~ -x2/(sigma-1+x2)*100, coef(eq_col), vcov(eq_col))
table.col3 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

# Col 4
eq_col <- lm(d_ln_emp_per_firm_small_1 ~ d_ln_GSP_worker_1 + d_ln_firms_lag_1, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Estimate"]
coef_firm = summary(eq_col)$coefficients["d_ln_firms_lag_1", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Std. Error"], digits = 3)
phi = round(-coef_firm, digits= 3)
se_phi = round(summary(eq_col)$coefficients["d_ln_firms_lag_1", "Std. Error"], digits = 3)
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(1+coef_firm)/(sigma-1+coef_prod/(1+coef_firm))*100
amp_se = deltamethod(~ -x2/(1+x3)/(sigma-1+x2/(1+x3))*100, coef(eq_col), vcov(eq_col))
table.col4 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

table <- cbind(table.col1, table.col2, table.col3, table.col4)
table <- round(table, digits=3)
rnames <- c("lambdar", "se", "phi", "se", "R2", "N", "amp", "se")
cnames <- c("41 years","41 years lagged N","1 years","1 years lagged N")
tableA5 <- matrix(table,nrow=8,byrow=FALSE,dimnames=list(rnames,cnames))
View(tableA5)

# save regression results
write.csv(tableA5,file=paste(wkdir, "output/tables/tableA5.csv",  sep="/"), row.names=TRUE)


###################### Prepare data for Table 10 and A6 #################### 
data_gsp <- fread(paste(wkdir, "raw data/realStateGDP.csv",  sep="/")) %>% 
  rename(real_gsp = rgdp_haver)
data_gsp <- mutate(data_gsp, state_abbrev = toupper(state_abbrev))
data_fips <- fread(paste(wkdir, "raw data/stateFips.csv",  sep="/"))
data_gsp <- merge(data_gsp, data_fips, by.x = "state_abbrev", by.y = "stusps", all.x = TRUE, all.y = TRUE)
data_bds <- fread(paste(wkdir, "raw data/bds2020_st.csv",  sep="/"),  select = c("year", "firms", "emp",'st'))
data_gsp_bds <- merge(data_gsp, data_bds, by.x = c("st", "year"), by.y = c("st","year"), all.x = FALSE, all.y = TRUE)
data_cbp <- fread(paste(wkdir, "raw data/Census_CBP/cbp_1986_2020.csv",  sep="/"),  
                        select = c("year", "st", "emp_cbp"))
data_gsp_bds <- merge(data_gsp_bds, data_cbp, by.x = c("st","year"), by.y = c("st","year"), all.x = TRUE, all.y = TRUE)
data_gsp_bds <-mutate(data_gsp_bds,  ln_GSP_worker = log(real_gsp / emp), 
                      ln_GSP_worker_cbp = log(real_gsp / emp_cbp),
                      ln_emp_per_firm = log(emp / firms),
                      ln_emp = log(emp),
                      ln_firms = log(firms))

data_gsp_bds <- data_gsp_bds[ data_gsp_bds$state_abbrev != "DC",]
data_gsp_bds$ln_firms_lag <- dplyr::lag(data_gsp_bds$ln_firms, n=1, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
firstyear = min(data_gsp_bds$year)
data_gsp_bds[data_gsp_bds$year==firstyear, c("ln_firms_lag")]<- NA
firstyear_cbp = 1986
data_gsp_bds <- data_gsp_bds[data_gsp_bds$year>=firstyear_cbp, ]
firstyear = min(data_gsp_bds$year)

# calculate 1 year change in the variables
h = 1
data_gsp_bds$ln_firms_lag_lagged1 <- dplyr::lag(data_gsp_bds$ln_firms_lag, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_lagged1 <- dplyr::lag(data_gsp_bds$ln_emp, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_per_firm_lagged1 <- dplyr::lag(data_gsp_bds$ln_emp_per_firm, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_GSP_worker_lagged1 <- dplyr::lag(data_gsp_bds$ln_GSP_worker, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_GSP_worker_cbp_lagged1 <- dplyr::lag(data_gsp_bds$ln_GSP_worker_cbp, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))

data_gsp_bds[data_gsp_bds$year==firstyear+h-1, c("ln_firms_lag_lagged1", "ln_emp_lagged1","ln_emp_per_firm_lagged1","ln_GSP_worker_lagged1","ln_GSP_worker_cbp_lagged1")]<- NA

# 1 year change in the regression variables
data_gsp_bds$d_ln_firms_lag_1 = data_gsp_bds$ln_firms_lag - data_gsp_bds$ln_firms_lag_lagged1
data_gsp_bds$d_ln_emp_1 = data_gsp_bds$ln_emp - data_gsp_bds$ln_emp_lagged1
data_gsp_bds$d_ln_emp_per_firm_1 = data_gsp_bds$ln_emp_per_firm -  data_gsp_bds$ln_emp_per_firm_lagged1 
data_gsp_bds$d_ln_GSP_worker_1 = data_gsp_bds$ln_GSP_worker - data_gsp_bds$ln_GSP_worker_lagged1
data_gsp_bds$d_ln_GSP_worker_cbp_1 = data_gsp_bds$ln_GSP_worker_cbp - data_gsp_bds$ln_GSP_worker_cbp_lagged1


# 34 year lag
h = 34
data_gsp_bds$ln_firms_lag_lagged34 <- dplyr::lag(data_gsp_bds$ln_firms_lag, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_lagged34 <- dplyr::lag(data_gsp_bds$ln_emp, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_emp_per_firm_lagged34 <- dplyr::lag(data_gsp_bds$ln_emp_per_firm, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_GSP_worker_lagged34 <- dplyr::lag(data_gsp_bds$ln_GSP_worker, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds$ln_GSP_worker_cbp_lagged34 <- dplyr::lag(data_gsp_bds$ln_GSP_worker_cbp, n=h, order_by = cbind( data_gsp_bds$st, data_gsp_bds$year))
data_gsp_bds[data_gsp_bds$year<=firstyear+h-1, c("ln_firms_lag_lagged34","ln_emp_lagged34","ln_emp_per_firm_lagged34","ln_GSP_worker_lagged34","ln_GSP_worker_cbp_lagged34")]<- NA

# 34 year change in the regression variables
data_gsp_bds$d_ln_firms_lag_34 = data_gsp_bds$ln_firms_lag - data_gsp_bds$ln_firms_lag_lagged34
data_gsp_bds$d_ln_emp_34 = data_gsp_bds$ln_emp - data_gsp_bds$ln_emp_lagged34
data_gsp_bds$d_ln_emp_per_firm_34 = data_gsp_bds$ln_emp_per_firm -  data_gsp_bds$ln_emp_per_firm_lagged34 
data_gsp_bds$d_ln_GSP_worker_34 = data_gsp_bds$ln_GSP_worker - data_gsp_bds$ln_GSP_worker_lagged34
data_gsp_bds$d_ln_GSP_worker_cbp_34 = data_gsp_bds$ln_GSP_worker_cbp - data_gsp_bds$ln_GSP_worker_cbp_lagged34
View(data_gsp_bds)

#####################  Table 10 #################### 
sigma = 4 
# Col 1
eq_col <- lm( d_ln_emp_per_firm_34 ~ d_ln_GSP_worker_cbp_34, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_cbp_34", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_cbp_34", "Std. Error"], digits = 3)
phi = 0
se_phi = 0
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(sigma-1+coef_prod)*100
amp_se = deltamethod(~ -x2/(sigma-1+x2)*100, coef(eq_col), vcov(eq_col))
table.col1 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

# Col 2
eq_col <- lm(d_ln_emp_per_firm_34 ~ d_ln_GSP_worker_cbp_34 + d_ln_firms_lag_34, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_cbp_34", "Estimate"]
coef_firm = summary(eq_col)$coefficients["d_ln_firms_lag_34", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_cbp_34", "Std. Error"], digits = 3)
phi = round(-coef_firm, digits= 3)
se_phi = round(summary(eq_col)$coefficients["d_ln_firms_lag_34", "Std. Error"], digits = 3)
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(1+coef_firm)/(sigma-1+coef_prod/(1+coef_firm))*100
amp_se = deltamethod(~ -x2/(1+x3)/(sigma-1+x2/(1+x3))*100, coef(eq_col), vcov(eq_col))
table.col2 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

# Col 3
eq_col <- lm( d_ln_emp_per_firm_1 ~ d_ln_GSP_worker_cbp_1, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_cbp_1", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_cbp_1", "Std. Error"], digits = 3)
phi = 0
se_phi = 0
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(sigma-1+coef_prod)*100
amp_se = deltamethod(~ -x2/(sigma-1+x2)*100, coef(eq_col), vcov(eq_col))
table.col3 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

# Col 4
eq_col <- lm(d_ln_emp_per_firm_1 ~ d_ln_GSP_worker_cbp_1 + d_ln_firms_lag_1, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_cbp_1", "Estimate"]
coef_firm = summary(eq_col)$coefficients["d_ln_firms_lag_1", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_cbp_1", "Std. Error"], digits = 3)
phi = round(-coef_firm, digits= 3)
se_phi = round(summary(eq_col)$coefficients["d_ln_firms_lag_1", "Std. Error"], digits = 3)
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(1+coef_firm)/(sigma-1+coef_prod/(1+coef_firm))*100
amp_se = deltamethod(~ -x2/(1+x3)/(sigma-1+x2/(1+x3))*100, coef(eq_col), vcov(eq_col))
table.col4 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

table <- cbind(table.col1, table.col2, table.col3, table.col4)
table <- round(table, digits=3)
rnames <- c("lambdar", "se", "phi", "se", "R2", "N", "amp", "se")
cnames <- c("34 years","34 years lagged N","1 years","1 years lagged N")
table10 <- matrix(table,nrow=8,byrow=FALSE,dimnames=list(rnames,cnames))
View(table10)


# save regression results
write.csv(table10,file=paste(wkdir, "output/tables/table10.csv",  sep="/"), row.names=TRUE)


#####################  Table A6 #################### 
sigma = 4 
# Col 1
eq_col <- ivreg( d_ln_emp_per_firm_34 ~ d_ln_GSP_worker_34 | d_ln_GSP_worker_cbp_34, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_34", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_34", "Std. Error"], digits = 3)
phi = 0
se_phi = 0
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(sigma-1+coef_prod)*100
amp_se = deltamethod(~ -x2/(sigma-1+x2)*100, coef(eq_col), vcov(eq_col))
table.col1 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

# Col 2
eq_col <- ivreg(d_ln_emp_per_firm_34 ~ d_ln_GSP_worker_34 + d_ln_firms_lag_34 | d_ln_GSP_worker_cbp_34 + d_ln_firms_lag_34, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_34", "Estimate"]
coef_firm = summary(eq_col)$coefficients["d_ln_firms_lag_34", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_34", "Std. Error"], digits = 3)
phi = round(-coef_firm, digits= 3)
se_phi = round(summary(eq_col)$coefficients["d_ln_firms_lag_34", "Std. Error"], digits = 3)
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(1+coef_firm)/(sigma-1+coef_prod/(1+coef_firm))*100
amp_se = deltamethod(~ -x2/(1+x3)/(sigma-1+x2/(1+x3))*100, coef(eq_col), vcov(eq_col))
table.col2 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

# Col 3
eq_col <- ivreg( d_ln_emp_per_firm_1 ~ d_ln_GSP_worker_1 | d_ln_GSP_worker_cbp_1 , data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Std. Error"], digits = 3)
phi = 0
se_phi = 0
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(sigma-1+coef_prod)*100
amp_se = deltamethod(~ -x2/(sigma-1+x2)*100, coef(eq_col), vcov(eq_col))
table.col3 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

# Col 4
eq_col <- ivreg(d_ln_emp_per_firm_1 ~ d_ln_GSP_worker_1 + d_ln_firms_lag_1 | d_ln_GSP_worker_cbp_1 + d_ln_firms_lag_1, data = data_gsp_bds)
summary(eq_col)
coef_prod = summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Estimate"]
coef_firm = summary(eq_col)$coefficients["d_ln_firms_lag_1", "Estimate"]
lambda = round(coef_prod+1, digits= 3)
se_lambda = round(summary(eq_col)$coefficients["d_ln_GSP_worker_1", "Std. Error"], digits = 3)
phi = round(-coef_firm, digits= 3)
se_phi = round(summary(eq_col)$coefficients["d_ln_firms_lag_1", "Std. Error"], digits = 3)
r2 = round(summary(eq_col)$r.squared, digits = 3)
amp = -coef_prod/(1+coef_firm)/(sigma-1+coef_prod/(1+coef_firm))*100
amp_se = deltamethod(~ -x2/(1+x3)/(sigma-1+x2/(1+x3))*100, coef(eq_col), vcov(eq_col))
table.col4 <- c(lambda,se_lambda,phi,se_phi,r2,nobs(eq_col),amp,amp_se)

table <- cbind(table.col1, table.col2, table.col3, table.col4)
table <- round(table, digits=3)
rnames <- c("lambdar", "se", "phi", "se", "R2", "N", "amp", "se")
cnames <- c("34 years","34 years lagged N","1 years","1 years lagged N")
tableA6 <- matrix(table,nrow=8,byrow=FALSE,dimnames=list(rnames,cnames))
View(tableA6)


# save regression results
write.csv(tableA6,file=paste(wkdir, "output/tables/tableA6.csv",  sep="/"), row.names=TRUE)


